Primary ComponentsAnalysis
# -----------Large Attractiveness----------------
pcaDF <- attracGEO789VISIT %>% dplyr::select(-visits,-placekey, -DISTRICTID)
pca = prcomp(pcaDF, center = TRUE, scale. = TRUE)
names(pca)
## [1] "sdev" "rotation" "center" "scale" "x"
# decide the number of components
components = 1:ncol(pcaDF)
plot(pca$sdev ~ components, ylab = "PCA Standard Deviation", xlab = "Component", pch = 19, type = "b")
abline(v = 5, col = "red")
Figure6.1.3 Primary Components of Large Attractiveness
pca$rotation <- -1*pca$rotation
pca$x <- -1*pca$x
corrplot(t(pca$rotation[,1:5]), is.corr=FALSE)
Figure6.1.3 Primary Components of Large Attractiveness
# PC1: self-condition
# PC2: official attention
# PC3: residents' vibrancy
# PC4: outdoor characters
# PC5: accessibility
# -----------Small Attractiveness--------------
pcaDFSmall <- attracGEO789VISIT %>% dplyr::select(area,spraygroundNum,tennisCourtNum,treeHeight,treeArea,programNum,programCap)
pcaSmall = prcomp(pcaDFSmall, center = TRUE, scale. = TRUE)
names(pcaSmall)
## [1] "sdev" "rotation" "center" "scale" "x"
# decide the number of components
componentsSmall = 1:ncol(pcaDFSmall)
plot(pcaSmall$sdev ~ componentsSmall, ylab = "PCA Standard Deviation", xlab = "Component", pch = 19, type = "b")
abline(v = 5, col = "red")
Figure6.1.3 Primary Components of Small Attractiveness
pcaSmall$rotation <- -1*pcaSmall$rotation
pcaSmall$x <- -1*pcaSmall$x
corrplot(t(pcaSmall$rotation[,1:5]), is.corr=FALSE)
Figure6.1.3 Primary Components of Small Attractiveness
# PC1: self-condition
# PC2: official attention
# PC3: residents' vibrancy
# PC4: outdoor characters
# PC5: accessibility
# construct attractiveness matrix
largeRetrunPCAValue <- as.data.frame(pca$x)
smallRetrunPCAValue <- as.data.frame(pcaSmall$x)
LargePCA <- largeRetrunPCAValue %>% dplyr::select(PC1,PC2,PC3,PC4,PC5)
SmallPCA <- smallRetrunPCAValue %>% dplyr::select(PC1,PC2,PC3,PC4,PC5)
LargePCAAttTogo <- cbind(LargePCA, attracGEO789 %>% dplyr::select(placekey)) %>% st_sf()
smallPCAAttTogo <- cbind(SmallPCA, attracGEO789 %>% dplyr::select(placekey)) %>% st_sf()
# st_write(LargePCAAttTogo,"data/output/LargePCAAttTogo.GEOJSON")
# st_write(smallPCAAttTogo,"data/output/smallPCAAttTogo.GEOJSON")
# # parks
# modelPlaces<- as.character(attracGEO$geometry)
#
# modelPlaces <- modelPlaces %>% str_replace("\\(","") %>%
# str_replace("\\)","")%>%
# str_replace("c","")
#
# latParks = map(modelPlaces,function(x){
# medium = str_split(x,",")
# medium = medium[[1]][2]
# return(medium)
# })
#
# lngParks = map(modelPlaces,function(x){
# medium = str_split(x,",")
# medium = medium[[1]][1]
# return(medium)
# })
#
# latParks = unlist(latParks)
# lngParks = unlist(lngParks)
#
# modelPlacesFinal <- attracGEO %>%
# mutate(parkLat = latParks)%>%
# mutate(parkLng = lngParks) %>%
# st_drop_geometry()
#
# # census
# distCensusData<- as.character(CensusData$originGeometry)
#
# distCensusData <- distCensusData %>% str_replace("\\(","") %>%
# str_replace("\\)","")%>%
# str_replace("c","")
#
# latCensus = map(distCensusData,function(x){
# medium = str_split(x,",")
# medium = medium[[1]][2]
# return(medium)
# })
#
# lngCensus = map(distCensusData,function(x){
# medium = str_split(x,",")
# medium = medium[[1]][1]
# return(medium)
# })
#
# latCensus = unlist(latCensus)
# lngCensus = unlist(lngCensus)
#
# CensusData2 <- CensusData %>%
# mutate(bgLat = latCensus)%>%
# mutate(bgLng = lngCensus)
#
# #openstreetmap
# st_write(modelPlacesFinal,"modelPlacesFinal.csv")
# st_write(CensusData2,"CensusData2.csv")
modelPlacesFinal <- st_read("data/output/modelPlacesFinal.csv")
## Reading layer `modelPlacesFinal' from data source
## `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\modelPlacesFinal.csv'
## using driver `CSV'
## Warning: no simple feature geometries present: returning a data.frame or tbl_df
CensusData2 <- st_read("CensusData2.csv")
## Reading layer `CensusData2' from data source
## `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\CensusData2.csv'
## using driver `CSV'
## Warning: no simple feature geometries present: returning a data.frame or tbl_df
#
# #query the distance
# options(osrm.server = "http://127.0.0.1:5000/")
# options(digits=8)
#
# theDistanceMatrix=list()
# for(i in 1:nrow(CensusData2)){
# src = c(as.numeric(CensusData2["bgLng"][i,]),as.numeric(CensusData2["bgLat"][i,]))
# theVector = c()
# for (j in 1:nrow(modelPlacesFinal)){
# dst = c(as.numeric(modelPlacesFinal["parkLng"][j,]),as.numeric(modelPlacesFinal["parkLat"][j,]))
# medium = osrmRoute(src,dst,overview = "FALSE")
# theVector = append(theVector,medium[2])
# }
# theDistanceMatrix= append(theDistanceMatrix,list(theVector))
# print(paste0("finish ...",i,"/",nrow(CensusData2)))
# }
#
# theDistanceMatrix2 <- theDistanceMatrix
# daFrame <- as.data.frame(matrix(unlist(theDistanceMatrix2), nrow=length(theDistanceMatrix2), byrow=TRUE))
#st_write(daFrame,"data/output/theDistanceMatrixWhole.csv")
theDistanceMatrixWhole <- st_read("data/output/theDistanceMatrixWhole.csv")
## Reading layer `theDistanceMatrixWhole' from data source
## `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\theDistanceMatrixWhole.csv'
## using driver `CSV'
## Warning: no simple feature geometries present: returning a data.frame or tbl_df
theDistanceMatrixWhole <- setNames(theDistanceMatrixWhole, modelPlacesFinal$placekey)
theDistanceMatrixWhole <-
merge(theDistanceMatrixWhole, CensusData2 %>% dplyr::select(GEOID), by = "row.names") %>%
dplyr::select(-Row.names)
theDistanceMatrixWhole2 <-
theDistanceMatrixWhole %>%
pivot_longer(!GEOID, names_to = "placekey", values_to = "distance")
# calculate probability
prob <- modelData %>%
filter(placekey %in% attracGEO789$placekey) %>%
group_by(origin) %>%
summarise(total = sum(visitors)) %>%
left_join(modelData, by="origin") %>%
mutate(probability = visitors/total) %>%
dplyr::select(placekey, origin, probability)
# Based on the distance panel
huffData <-
theDistanceMatrixWhole2 %>%
left_join(prob,by=c("placekey"="placekey","GEOID"="origin")) %>%
mutate(probability = replace_na(probability, 4.4e-5))
# here drop_na is to get the data of DIstrict 7,8,9
huffWithLargeAttract <-
huffData %>%
left_join(LargePCAAttTogo %>% st_drop_geometry(),by="placekey") %>%
drop_na()
huffWithSmallAttract <-
huffData %>%
left_join(smallPCAAttTogo %>% st_drop_geometry(),by="placekey") %>%
drop_na()
# st_write(huffWithSmallAttract, "data/output/RealModelData.csv")
# find the naics code that has most smae-day visits
# unnest the related_same_day_brand
# relatedBrand <-
# PPRmoves %>%
# select(placekey, related_same_day_brand, date_range_start) %>%
# mutate(date_range_start = as_date(date_range_start),
# month = month(date_range_start)) %>%
# dplyr::select(-date_range_start) %>%
# mutate(related_same_day_brand = future_map(related_same_day_brand, function(x){
# jsonlite::fromJSON(x) %>%
# as_tibble() %>%
# gather()
# }))
#
# relatedBrand <- relatedBrand %>%
# unnest(related_same_day_brand) %>%
# rename(relatedBrand =key ,visitors= value)
#
# st_write(relatedBrand,"data/output/relatedBrand.GeoJSON")
# relatedBrand <- st_read("data/output/relatedBrand.GeoJSON",crs=crs)
# relatedBrand <- relatedBrand %>%
# dplyr::select(-placekey,-month) %>%
# st_drop_geometry() %>%
# group_by(relatedBrand) %>%
# summarize(totalVisitors = sum(visitors))
#
# # connect this dataframe with naics code
# core_poi <- vroom("data/safegraph/Philadelphia-Camden-WilmingtonPA-NJ-DE-MDMSA-CORE_POI-2021_11-2021-12-17/core_poi.csv")
# relatedNaics <- core_poi %>% dplyr::select(brands,naics_code) %>% distinct() %>% drop_na()
#
# relatedBrandWithNaics <-
# relatedBrand %>%
# left_join(relatedNaics,by=c("relatedBrand" = "brands")) %>%
# drop_na() %>%
# group_by(naics_code) %>%
# summarise(totalVisitors = sum(totalVisitors)) %>%
# filter(totalVisitors>365)
#
# relatedBrandWithNaics$naics_code <- as.factor(relatedBrandWithNaics$naics_code)
# st_write(relatedBrandWithNaics,"data/output/relatedBrandWithNaics.csv")
relatedBrandWithNaics <- st_read("data/output/relatedBrandWithNaics.csv")
## Reading layer `relatedBrandWithNaics' from data source
## `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\relatedBrandWithNaics.csv'
## using driver `CSV'
relatedBrandWithNaics$totalVisitors <- as.numeric(relatedBrandWithNaics$totalVisitors)
relatedBrandWithNaics%>%
arrange(-totalVisitors) %>%
mutate(naics_code = factor(naics_code, naics_code)) %>%
ggplot(aes(x=naics_code, y=totalVisitors)) +
geom_bar(position ="dodge",fill = palette1_main,stat='identity') +
labs(x="Naics Code", y="Total Visitors", size = 10) +
plotTheme(5,5)+
theme(
axis.text = element_text( size=5,angle = 90),
strip.text = element_text( size=5),
strip.text.x = element_text( size = 10))
Figure6.1.5 Total Vistors of Related Brand With Naics
# use the same naics code to get the safegraph poi data to construct naics code's location
# 445120 - Convenience Stores
# 722513 - Limited-Service Restaurants
# 722515 - Snack and Nonalcoholic Beverage Bars
# 452319 - All Other General Merchandise Stores
# 447110 - Gasoline Stations with Convenience Stores
# use number of visits as their attractiveness
pprRelatedNaicsMoves <- function(pprRelatedNaics, safeGraph.geo) {
safeGraph.geo$raw_visitor_counts = as.numeric(safeGraph.geo$raw_visitor_counts)
moves <- safeGraph.geo %>%
filter(naics_code==pprRelatedNaics) %>%
dplyr::select(placekey, location_name, raw_visitor_counts) %>%
group_by(placekey) %>%
summarise(visits = sum(raw_visitor_counts))
return(moves)
}
# centrality with RESTAURANT variable
pprRelatedNaics = 722513
restSurroundEffect <- pprRelatedNaicsMoves(pprRelatedNaics, safeGraph.geo)
# centrality with Convenient variable
pprRelatedNaics = 445120
convenientSurroundEffect <- pprRelatedNaicsMoves(pprRelatedNaics, safeGraph.geo)
# centrality with Park variable
parksSurroundEffect <- attracGEO
# visualized
# rbind(restSurroundEffect %>% mutate(label="restaurant") %>% dplyr::select(-visits),
# convenientSurroundEffect %>% mutate(label="convinient store")%>% dplyr::select(-visits),
# parksSurroundEffect %>% mutate(label="park")) %>%
# ggplot() +
# geom_sf(data=pprServiceArea,color='black',size=0.35,fill= "transparent")+
# geom_sf(data=pprDistrict,color='black',size=0.5,fill= "transparent")+
# geom_sf(color = palette3,fill = palette3,alpha = 0.3) +
# facet_wrap(~label)+
# mapTheme()+
# theme(legend.position = "bottom",
# legend.key.width = unit(0.5, 'cm'),
# legend.key.height = unit(0.2, 'cm'))
restSurroundDot <- restSurroundEffect %>% mutate(label="restaurant") %>% dplyr::select(-visits)
convenientSurroundDot <- convenientSurroundEffect %>% mutate(label="convinient store")%>% dplyr::select(-visits)
parksSurroundDot <- parksSurroundEffect %>% mutate(label="park")
# visualized three types of obejects in one map
ggplot() +
geom_sf(data=pprServiceArea,color='black',size=0.35,fill= "transparent")+
geom_sf(data=pprDistrict,color='black',size=0.5,fill= "transparent")+
geom_sf(data=restSurroundDot,aes(geometry=geometry),color = palette3[3],fill = palette3[3],alpha = 0.5,size = 1) +
geom_sf(data=convenientSurroundDot,aes(geometry=geometry),color = palette3[1],fill = palette3[1],alpha = 0.5,size = 1) +
geom_sf(data=parksSurroundDot,aes(geometry=geometry),color = palette3[2],fill = palette3[2],alpha = 1, size = 2) +
# facet_wrap(~label)+
mapTheme()+
theme(legend.position = "bottom",
legend.key.width = unit(0.5, 'cm'),
legend.key.height = unit(0.2, 'cm'),
legend.text = element_text( colour = "black", size = 8))
Figure6.1.6 Overlay Map of Parks, Restaurants and Convinient Stores
##### Prepare data
source("huffModelScripts.R")
##
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
##
## accumulate, when
################# for Shaun ##############
#source("huffModelScripts2.R")
## park info
# RealModelData <- st_read("data/output/RealModelData.csv") %>%
RealModelData <- huffWithSmallAttract %>%
mutate(distance = as.numeric(distance),
probability = as.numeric(probability),
PC1 = as.numeric(PC1),
PC2 =as.numeric(PC2),
PC3 =as.numeric(PC3),
PC4 =as.numeric(PC4),
PC5 =as.numeric(PC5))
# transform PC to positive values
RealModelData <- RealModelData %>%
mutate(PC1 = PC1-min(RealModelData$PC1),
PC2 = PC2-min(RealModelData$PC2),
PC3 = PC3-min(RealModelData$PC3),
PC4 = PC4-min(RealModelData$PC4),
PC5 = PC5-min(RealModelData$PC5))
## park geometry
RealModelPlaces <- st_read("data/output/attracGEO.GeoJSON") %>%
filter(placekey %in% unique(RealModelData$placekey))
## Reading layer `attracGEO' from data source
## `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\attracGEO.GeoJSON'
## using driver `GeoJSON'
## Simple feature collection with 351 features and 1 field
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -75 ymin: 40 xmax: -75 ymax: 40
## Geodetic CRS: WGS 84
## neighbor attr and geometry - Here is convenient stores
convenientSurroundEffect <- st_read("data/output/convenientSurroundEffect.GeoJSON")
## Reading layer `convenientSurroundEffect' from data source
## `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\convenientSurroundEffect.GeoJSON'
## using driver `GeoJSON'
## Simple feature collection with 255 features and 2 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -75 ymin: 40 xmax: -75 ymax: 40
## Geodetic CRS: WGS 84
### Model Function
fit <- fit_parameter(data = RealModelData,
places = RealModelPlaces,
neighbor_data = convenientSurroundEffect,
neighbor_id_column="placekey",
neighbor_attr_column ="visits",
id_column="placekey",
attr_column =c("PC1","PC2","PC3","PC4","PC5"),
distance_column="distance",
probability_column="probability",
origin_column = "GEOID",
k=3)
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(neighbor_id_column)` instead of `neighbor_id_column` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(neighbor_attr_column)` instead of `neighbor_attr_column` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(id_column)` instead of `id_column` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(probability_column)` instead of `probability_column` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(origin_column)` instead of `origin_column` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(col_list)` instead of `col_list` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
parameter <- fit %>%
dplyr::select(parameter) %>%
unnest(cols="parameter")
data <- fit %>%
dplyr::select(data)
mean(parameter$r2)
## [1] 0.12
# x <- centrality(destination="222-222@628-pg9-2hq",
# data = modelData %>% filter(placekey!="zzw-222@628-pj6-9pv"),
# places = modelPlaces,
# neighbor_data = modelPlaces,
# neighbor_id_column="placekey",
# neighbor_attr_column ="attr",
# id_column="placekey",
# k=3)
#
# x <- centrality_to_df(data = modelData %>% filter(placekey!="zzw-222@628-pj6-9pv"),
# places = modelPlaces,
# neighbor_data = modelPlaces,
# neighbor_id_column="placekey",
# neighbor_attr_column ="attr",
# id_column="placekey",
# k=3)